
*----------------------------------------------------------------------------*

**# Set Paths

*----------------------------------------------------------------------------*


//ssc install estout

global raw_data_dir="<add path to your working directory>" 
global replica_dir="<add path to your working directory>"
global data_dir=	"$replica_dir/data"
global output_dir=	"$replica_dir/output"


use "$raw_data_dir/<teacher panel data stacked and merged to sned>.dta", clear

tab year
count 

drop if sned_yr==. | mrun==. | rbd==. 

*----------------------------------------------------------------------------*

**# Teacher level data -- TEACHER-level analysis

*----------------------------------------------------------------------------*


**#  Index variables
	

gen  	st_ind25=indicer-threshold_1_hg 
replace st_ind25=. if res_sned==3 // omits the non winners

gen 	st_ind10=indicer-threshold_2_hg
replace st_ind10=. if res_sned==1 // omits 25% treated


gen st_ind25_sq=st_ind25*st_ind25
gen st_ind10_sq=st_ind10*st_ind10

gen     treat25=(res_sned==1)
replace treat25=. if res_sned==3

gen     treat10=(res_sned==2)
replace treat10=. if res_sned==1

gen inter25=treat25*st_ind25
gen inter10=treat10*st_ind10

gen inter25_sq=treat25*st_ind25_sq
gen inter10_sq=treat10*st_ind10_sq


*----------------------------------------------------------------------------*

**# Derived Variables   


replace horas1=0 if horas1==-1
replace horas2=0 if horas2==. & horas1!=.
replace horas1=0 if horas2!=. & horas1==.
gen     tea_classhrs=(horas1+horas2) 
replace tea_classhrs=(horas1+horas2)*(60/45) if year>2014 
/* 
	Chronological (pedagogic hours untill 2014) hours  in which the classroom teachers perform at the corresponding teaching level
 	NOTE: (untill) Hasta el año 2014, estos campos correspondían a horas pedagógicas (de 45 minutos)
			convert to pedagogic hour 
*/



* Teacher birthday: 20015-2019 yyyymm, 2003-2014 yyyymmdd

bysort year : sum doc_fec_nac

ssc install nsplit

nsplit doc_fec_nac if year<2015 , digits(4 2 2) gen(tea_bd_yr tea_bd_mth tea_bd_day)
nsplit doc_fec_nac if year>2014 , digits(4 2)

replace tea_bd_yr =doc_fec_nac1 if tea_bd_yr==.
replace tea_bd_mth=doc_fec_nac2 if tea_bd_mth==.

gen tea_age= year - tea_bd_yr	 	 

gen tea_admin=(!inlist(id_ifp, 1, 17))
la var tea_admin "Administrative duties primarily"

tab 	tit_id
gen 	tit_educ=(tit_id==1)
replace tit_educ=. if tit_id==.
la var 	tit_educ "Has an Education Degree"

rename contract permanent
la var permanent "Permanent contract"


gen municip=(cod_depe==1 | cod_depe==2 ) 

rename cluster hg

* 

rename ano_servicio_sistema exp


* multsch

bysort year mrun: egen nsch=count(rbd) 

gen 	multsch =(nsch >1)
replace multsch =. if rbd ==.



* AEP once received lasts 10 years

gen aep_yr=.
bysort mrun: replace aep_yr=year if  aep_selected==1
bysort mrun: fillmissing aep_yr, with(any)

gen aep_appl_yr=.
bysort mrun: replace aep_appl_yr=year if  aep_selected==1 | aep_selected==0
bysort mrun: fillmissing aep_appl_yr, with(any)


* applicants of AEP 
gen aep_appl=(aep_selected!=.)

* ever-winners of AEP 
gen 	aep10=1 if year>=aep_yr
replace aep10=0 if year<aep_yr
* never-winners
replace aep10=0 if aep10==.





/*------------------------------------------------------------------------------
	Create y_tm1 i.e.(t-1) and y_tp1 i.e. (t+1) 							
*/

sort 	mrun rbd year
egen 	mrun_rbd=group(mrun rbd)
unique 	mrun rbd
unique 	mrun_rbd
unique  mrun_rbd year


xtset   mrun_rbd year, yearly
 
foreach var in 	rbd res_sned year {  
gen 	`var'_tp1=F.`var'
gen 	`var'_tm1=L.`var'

}


/* 		dropped_tp1 and newtea

mrun	mrun_rbd		rbd_t-1		rbd_t		rbd_t+1	 	dropped_t+1		newtea_t	
12		12_1			.			1			.			1				1
12		12_2			2			2			.			1				0
12		12_3			.			3			3			0				1
12		12_4 			.			.			.			. 				.	

*/

gen dropped_tp1=(rbd_tp1==. & rbd!=.) 
/* 
1 if rbd_tp1==. and 0 otherwise. 
no show in the "rbd_mrun year " series in the data, 
same teacher maybe working in another rbd  
*/

gen newtea=(rbd_tm1==. & rbd!=.) 
/* new in time t if the teacher has a missing entry mrun_rbd in t-1 */

la var newtea "New teacher (at time t)"
la var dropped_tp1 "Teacher drops this school next year (t+1)"


*
xtset mrun_rbd year, yearly

foreach var in 	horas_contrato tea_classhrs municip rural_rbd gender tit_educ permanent tea_admin multsch {  
gen 	`var'_tp1=F.`var'
gen 	`var'_tm1=L.`var'
}
/* 
	Replace with zero the "." lag/lead entries of teacher-school  
	Indicates they don't work there anymore, zero would indicate no hours
	IFF the teacher has a non-missing entry in time t.
	
	e.g.: 			horas_contrato			permanent is a control variable
					-------------------		-------------------
	mrun_rbd		t-1		t		t+1		t-1		t		t+1
	-----------------------------------------------------------
	12_1			.		20		.		.		1		.
	12_3			.		20		30		.		0		0
	12_4			.		.		.		.		.		.
	12_5			10		15		20		1		1		1
	
	change to:
											* covariates can't change to 0 in t-1/t+1 at mrun_rbd level *
											* replace with their record at time t, if not we miss the first 2 rows *
	12_1			0		20		0		1		1		1	
	12_3			0		20		30		0		0		0
	12_4			.		.		.		.		.		.	
	12_5			10		15		20		1		1		1
	
	*/

foreach var in 	horas_contrato tea_classhrs{  
replace `var'_tp1=0 if `var'_tp1==. & `var'!=.
replace `var'_tm1=0 if `var'_tm1==. & `var'!=.
}
foreach var in 	municip rural_rbd gender tit_educ permanent tea_admin multsch{  
replace `var'_tp1=`var' if `var'_tp1==. & `var'!=.
replace `var'_tm1=`var' if `var'_tm1==. & `var'!=.
}


bysort rbd year: egen n_tea=count(mrun) 
levelsof n_tea


* Keep age 21-65
drop if tea_age<22 
drop if tea_age>65 & gender==1 // male teachers	
drop if tea_age>60 & gender==0 // female teachers 


save "$data_dir/tlvl.dta", replace


/*------------------------------------------------
	
	Teacher - level data -- final DROP 

*/

use "$data_dir/tlvl.dta", clear 


drop if year!=sned_yr // this is important, no need for the panel data because I create already y_tm1 i.e.(t-1) and y_tp1 i.e. (t+1)
keep if sned_yr==2006 | sned_yr==2008 | sned_yr==2010 | sned_yr==2012 | sned_yr==2014

* Keep basica and media and multigrade

keep if tipo_est=="A" | tipo_est=="B" | tipo_est=="C" | tipo_est=="D"


*** Create the cutoff mean for the heterogenous effects 

gen cut25=threshold_1_hg 
gen cut10=threshold_2_hg 

save "$data_dir/temp.dta", replace

collapse (first) cut25 cut10 st_ind25 st_ind10 municip , by(hg sned_yr)

foreach i in 25 10{
gen mean_cut`i'=. 
gen mean_cut`i'_pv=. 
gen mean_cut`i'_mv=. 

	foreach year in  2006 2008 2010 2012 2014 {
	sum 	cut`i' 				   if st_ind`i'!=.  			 	& sned_yr==`year', detail 
	replace mean_cut`i'=r(mean)	   if st_ind`i'!=.  				& sned_yr==`year'

	sum 	cut`i' 				   if st_ind`i'!=.  & municip==0 	& sned_yr==`year', detail 
	replace mean_cut`i'_pv=r(mean) if st_ind`i'!=.  & municip==0 	& sned_yr==`year'

	sum 	cut`i' 				   if st_ind`i'!=.  & municip==1 	& sned_yr==`year', detail 
	replace mean_cut`i'_mv=r(mean) if st_ind`i'!=.  & municip==1 	& sned_yr==`year'
	
}
}
keep mean_* hg sned_yr
merge 1:m hg sned_yr using "$data_dir/temp.dta"
drop _merge
erase "$data_dir/temp.dta"



la var treat25 	"Full vs. Partial Bonus"
la var treat10 	"Partial vs. No Bonus"


save "$data_dir/tlvl_fin.dta", replace


				
*----------------------------------------------------------------------------*

**# School level data -- SCHOOL level analysis , Index analysis

*----------------------------------------------------------------------------*

use "$data_dir/tlvl.dta", clear 


 
collapse (mean) dropped_tp1 newtea exp aep10 aep_appl permanent gender tit_educ multsch tea_admin horas_contrato tea_classhrs /// 
		  (firstnm)  municip rural_rbd  sned_yr cod_depe tipo_est n_tea /// 
		  (firstnm) hg indicer threshold_1_hg threshold_2_hg res_sned  ///
		  treat10 treat25 st_ind10 st_ind25 st_ind10_sq st_ind25_sq inter10 inter25 inter10_sq inter25_sq ///
          , by(year rbd)  



*	Outcome variables


xtset rbd year


foreach var in newtea exp aep10 horas_contrato tea_classhrs ///
				permanent gender tit_educ multsch tea_admin municip rural_rbd { 

gen `var'_tp1= F.`var' 
gen `var'_tm1= L.`var' 

}
gen dropped    =L.dropped_tp1
gen dropped_tm1=L.dropped


order rbd year dropped dropped_tm1 dropped_tp1 
sort rbd year

la var dropped_tp1 "Turnover - lagged attrition next year"
la var newtea_tp1 "Turnover - prop new next year"


tab sned_yr, gen(sned_yr_)
tab hg, gen(hg_)





*	Create variable to test Probability of Winning SNED again 


xtset rbd year

gen res_sned_tp2=F2.res_sned

sort rbd year
order rbd year res_sned res_sned_tp2

gen 	win_next=(res_sned_tp2<3)
replace win_next=. if res_sned_tp2==.
tab 	win_next

gen 	win_next25=(res_sned_tp2==1)
replace win_next25=. if res_sned_tp2==.
tab 	win_next25

gen 	win_next10=(res_sned_tp2==2)
replace win_next10=. if res_sned_tp2==. |   res_sned_tp2==1
tab 	win_next10


order rbd year res_sned  res_sned_tp2 win_next25 win_next10
sort rbd year




/*------------------------------------------------
		
		School level data -- Final DROP 
*/

drop if year!=sned_yr // this is important
keep if sned_yr==2006 | sned_yr==2008 | sned_yr==2010 | sned_yr==2012 | sned_yr==2014

* Keep basica and media and multigrade

keep if tipo_est=="A" | tipo_est=="B" | tipo_est=="C" | tipo_est=="D"

* Create the cutoff mean for the heterogenous effects 

gen cut25=threshold_1_hg 
gen cut10=threshold_2_hg 

save "$data_dir/temp.dta", replace

collapse (first) cut25 cut10 st_ind25 st_ind10 municip , by(hg sned_yr)

foreach i in 25 10{
gen mean_cut`i'=. 
gen mean_cut`i'_pv=. 
gen mean_cut`i'_mv=. 

gen median_cut`i'=. 
gen median_cut`i'_pv=. 
gen median_cut`i'_mv=. 

	foreach year in  2006 2008 2010 2012 2014 {
	sum 	cut`i' 					 if st_ind`i'!=.  			 	& sned_yr==`year', detail 
	replace mean_cut`i'	 =r(mean)	 if st_ind`i'!=.  				& sned_yr==`year'
 
	sum 	cut`i' 				 	 if st_ind`i'!=.  & municip==0 	& sned_yr==`year', detail 
	replace mean_cut`i'_pv  =r(mean) if st_ind`i'!=.  & municip==0 	& sned_yr==`year'
 
	sum 	cut`i' 				 	 if st_ind`i'!=.  & municip==1 	& sned_yr==`year', detail 
	replace mean_cut`i'_mv  =r(mean) if st_ind`i'!=.  & municip==1 	& sned_yr==`year'
	
 	}
}
keep mean_* hg sned_yr
merge 1:m hg sned_yr using "$data_dir/temp.dta"
drop _merge
erase "$data_dir/temp.dta"


la var gender_tm1  		"Teacher men, baseline"
la var tit_educ_tm1 	"Education degree, baseline"
la var municip_tm1 		"Municipal school, baseline"
la var rural_rbd_tm1 	"Rural school, baseline"
la var permanent_tm1 	"Permanent contract, baseline"
la var multsch_tm1 		"Teach in multiple schools, baseline" 
la var tea_admin_tm1 	"Administrative duties primarily, baseline" 


la var horas_contrato_tm1 	"Contract hours, baseline"
la var tea_classhrs_tm1  	"Classroom teaching hours , baseline"
 
la var horas_contrato_tp1 	"Contract hours, outcome"
la var tea_classhrs_tp1  	"Classroom teaching hours , outcome"


la var dropped_tm1   	"Turnover: Lagged attrition, baseline"
la var newtea_tm1     	"Turnover: Prop. new teachers, baseline"
la var exp_tm1    		"Mean teacher experience, baseline"
la var aep10_tm1   		"Prop. AEP certified, baseline"

la var dropped_tp1   	"Turnover: Lagged attrition, outcome"
la var newtea_tp1     	"Turnover: Prop. new teachers, outcome"
la var exp_tp1    		"Mean teacher experience, outcome"
la var aep10_tp1   		"Prop. AEP certified, outcome"


la var treat25 	"Full vs. Partial Bonus"
la var treat10 	"Partial vs. No Bonus"

la var st_ind25 "100% vs. 60%"
la var st_ind10 "60% vs. none"


save "$data_dir/slvl_fin.dta", replace
